import pandas as pd
from pandas import DataFrame
from bs4 import BeautifulSoup

with open('./assets/water_html.txt', 'r', encoding='utf-8', errors='ignore') as file:
    textInner = file.read()


soup = BeautifulSoup(textInner, 'lxml')
trList = soup.findAll("tr")
wq, wt, phv, do, er, mr, pt, nt = [], [], [], [], [], [], [], []
for node in trList:
    # 水质类型
    water_quality = node.find("span", attrs={"class": "sp_wt"}).text
    if water_quality:
        wq.append(water_quality)
        # 水温
        water_temp = node.findAll("td")[5].text
        wt.append(water_temp)
        # ph值
        ph = node.findAll("td")[6].text
        phv.append(ph)
        # 溶解氧
        dissolve_oxygen = node.findAll("td")[7].text
        do.append(dissolve_oxygen)
        # 电导率
        elector_rate = node.findAll("td")[8].text
        er.append(elector_rate)
        # 浊度
        muddy_rate = node.findAll("td")[9].text
        mr.append(muddy_rate)
        # 总磷
        phosphorus_total = node.findAll("td")[12].text
        pt.append(phosphorus_total)
        # 总氮
        nitrogen_total = node.findAll("td")[13].text
        nt.append(nitrogen_total)
DF1 = pd.read_csv("./assets/water_data.cvs")
DF = DataFrame({
    "水质": wq,
    "水温": wt,
    "ph值": phv,
    "溶解氧":do,
    "电导率":er,
    "浊度":mr,
    "总磷":pt,
    "总氮":nt
})
res = pd.concat([DF1,DF])
# res.to_csv("./assets/water_data.cvs",index=False)